#-*-coding:utf-8-*-

import urllib2
import urllib
import re
from random import randint
from renren import RenRen
from BeautifulSoup import BeautifulSoup
def post(url, values):
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data)
    req.add_header("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; WindowsNT)")
    response = urllib2.urlopen(req).read()
    return response

def parse(text):
    text = re.compile(r'<[^>]+>').sub('', text).strip()
    text = eval("u'%s'" % text).encode("utf-8")
    print text
    url = "http://wong2seg.sinaapp.com/index.php"
    html = post(url, {"word" : text})
    content = [x.strip() for x in html.split(" ") if x.strip()]
    return content 

if __name__ == "__main__":
    store = {}
    uname = "200814075"
    pwd = "2oo814o75"
    print "Login..."
    rr = RenRen(uname, pwd)
    login_success = rr.login()
    if login_success:
        print "Login success"
    else:
        print "Login failed"
    p = re.compile('http://rrurl.cn/\w+')
    statuss = rr.getStatusGenerator(280215520)
    for status in statuss:
        if "rootContent" in status:
            content = status["rootContent"] + status["content"]
        else:
            content = status["content"]
        content = p.sub("", content)
        try:
            words = parse(content)
        except:
            words = []
        for word in words:
            if len(word) > 3 and not word in ["http"]:
                store.setdefault(word, 0)
                store[word] += 1
    items = store.items()
    #items.sort(key=lambda x: x[1])
    results = [x for x in items if x[1] > 1]
    results.reverse()
    fp = open("result.html", "w")
    fp.write("<div style='width: 800px;'>")
    for i, result in enumerate(results):
        word, count = result
        fp.write('<span style="color:rgb(%d,%d,%d);font-size:%dpx">%s</span>' % (randint(1, 255), randint(1, 255), randint(1,255), count*4+30, word))
    fp.write("</div>")
    fp.close()
